*******************************************************************
*This file cleans wage data by education level from the Census/ACS*
*******************************************************************

foreach year in 1970 1990 2000 2008 {

use if year==`year' using "$raw_data_lmarket/ipums_census.dta", clear

*Drop institutional group quarters* 
quietly: drop if gqtyped>=100 & gqtyped<=499
*Drop alaska and hawai*
quietly: drop if statefip==2 | statefip==15

keep if age>=19&age<=64

*Keep only employed people
keep if empstat==1

gen group_education=""

replace group_education="_less" if educd>=2  & educd<=80
replace group_education="_associate" if educd>=81 & educd<=83 					
replace group_education="_bachelor" if educd>=100 & educd<=116 					
drop if group_education==""

gen group=group_education

**generate wages**
run "$xwalk_wage/subfile to clean wages.do"

*Wages
gen ipums_hrwage_ln=ln(hrwage) if hrwage!=. /*Logarithm of real wages */
gen ipums_yrwage_ln=ln(yrwage) if yrwage!=. /*Logarithm of real wages */

gen ipums_hrwage=hrwage if hrwage!=.        /*Real wages */
gen ipums_yrwage=yrwage if yrwage!=.        /*Real wages */

*Hours
gen ipums_yrhours_ln=ln(yrhours) if yrhours!=. /*Logarithm of real hours */

gen ipums_yrhours=yrhours if yrhours!=.        /*Real hours */

keep group cntygp* puma statefip ipums_* perwt 

**Merge czones using geography xwalk**
if `year'==1970{
gen ctygrp1970=cntygp97
joinby ctygrp1970 using "$project/xwalks/xwalks_geography/ctygrp1970_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==1990{
gen puma1990=statefip*10000+puma
joinby puma1990 using "$project/xwalks/xwalks_geography/puma1990_czone.dta", unmatched(master)
assert czone!=. 
}
else if `year'==2000  | `year'==2008  | `year' ==2011 {
replace puma=1801 if puma==77777
gen puma2000=statefip*10000+puma
joinby puma2000 using "$project/xwalks/xwalks_geography/puma2000_czone.dta", unmatched(master)
assert czone!=. 
}

**Aggregate at the czone level**
gen full_wt=afac*perwt
gen wage_earners=afac*perwt*(ipums_hrwage_ln!=.)

collapse (rawsum) wage_earners (mean) ipums_* [w=full_wt], by(czone group) fast

reshape wide wage_earners ipums_hrwage_ln ipums_hrwage ipums_yrwage_ln ipums_yrwage ipums_yrhours_ln ipums_yrhours, i(czone) j(group) string

foreach var of varlist ipums_* wage_earners_* {
rename `var' `var'_`year'
}
save "$clean_data_lmarket/czone`year'_Wages.dta", replace
}
